---
title: "Municipal-Level Analysis"
author: "Sally Sharif"
date: "2026-01-18"
output: word_document
---

```{r loading packages and data}
rm(list = ls())

# loading required packages 
library(table1)
library(corrtable)
library(ggplot2)
library(dplyr)
library(tidyr)
library(MASS)  
library(ivreg) 
library(MASS)
library(sandwich)
library(lmtest)
library(car)
library(texreg)
library(pscl)

# loading the data
obj_name <- load("municipal_level_data.RData")
data_mun0 <- get(obj_name)
nrow(data_mun0) # 1,101
```

```{r descriptive graph}
# Manuscript Figure 2
# getting the proportion of homicides for municipalities with and without resettled ex-combatants
desdata <- data_mun0 %>% 
  mutate(excombatant_present_binary = as.character(excombatant_present)) %>%
  pivot_longer(
    cols = matches("^prop_homicides_\\d{4}$"),
    names_to = "year",
    values_to = "prop_homicides") %>%
  mutate(year = as.integer(sub("prop_homicides_", "", year))) %>%
  group_by(excombatant_present_binary, year) %>%
  summarise(
    totalprop_homicides = sum(prop_homicides, na.rm = TRUE),
    .groups = "drop")

# plotting
ggplot(desdata, aes(x = year, y = totalprop_homicides, colour = excombatant_present_binary)) +
  geom_line() + geom_point() + theme_classic() +
  ylab("Number of Homicides per 100,000 inhabitants") +
  labs(color = "Resettled \n ex-combatants") +
  scale_colour_grey(start = 0.6, end = 0.1) +
  theme(text = element_text(family = "Times New Roman"))
```

```{r merging with the IDP data}
# loading the IDP data (based on reviewer's suggestion)
obj_name2 <- load("idp_arrival_data.RData")
arrival_data <- get(obj_name2)
nrow(arrival_data) # 1,048,575

# cleaning the data
arrivals_2016 <- arrival_data %>%
  filter(VIGENCIA == 2016) %>%
  mutate(
    COD_CIUDAD_MUNI = sprintf("%05s", as.character(COD_CIUDAD_MUNI)),
    PER_LLEGADA = as.numeric(PER_LLEGADA)) %>% group_by(COD_CIUDAD_MUNI) %>%
  summarise(idp_arrival_2016 = sum(PER_LLEGADA, na.rm = TRUE), .groups = "drop") %>%
  mutate(idp_arrival_2016 = as.integer(idp_arrival_2016)) %>%
  dplyr::select(COD_CIUDAD_MUNI, idp_arrival_2016)

nrow(arrivals_2016) # 1,000
# removing municipalities with no homicides
data_mun0_filtered <- data_mun0 %>% 
    filter(avg_non_excomb_homicides > 0) 

nrow(data_mun0_filtered) # 990
# left-join and set unmatched PER_LLEGADA to 0
data_mun <- data_mun0_filtered %>%
  left_join(arrivals_2016, by = c("mun_code" = "COD_CIUDAD_MUNI")) %>%
  mutate(idp_arrival_2016 = replace_na(idp_arrival_2016, 0))

nrow(data_mun) # 990
```

```{r descriptive stats}
### Appendix Table 2
table1(~ excomb_assassinated_17to20 + factor(protection) + excomb_numbers + collective_project + civilian_collaboration + population_2016 + factor(urban) + total_homicides_17to20 + non_excomb_homicides_17to20 + homicides_2016 + coca_2016 + inst_performance_2016 + factor(excombatant_present) + insurgent_attacks_2016 + gov_attacks_2016 + idp_arrival_2016, data = data_mun)

### Appendix Table 3
# correlation matrix 
corr_data <- data_mun %>%
  dplyr::select(excomb_assassinated_17to20, protection, excomb_numbers,
    collective_project, civilian_collaboration, total_homicides_17to20,
    non_excomb_homicides_17to20, homicides_2016,
    coca_2016, population_2016, inst_performance_2016, 
    insurgent_attacks_2016, gov_attacks_2016, idp_arrival_2016) %>%
  dplyr::mutate(protection = as.numeric(protection))

correlation_matrix(corr_data, digits = 2, 
                   use = "lower", replace_diagonal = TRUE)
```

```{r goodness of fit tests}
fit <- goodfit(data_mun$excomb_assassinated_17to20)
summary(fit)
# Appendix Figure 2
rootogram(fit)
Ord_plot(data_mun$excomb_assassinated_17to20)
goodfittest <- goodfit(data_mun$excomb_assassinated_17to20, type = "nbinomial")
summary(goodfittest)
distplot(data_mun$excomb_assassinated_17to20, type = "nbinomial")
distplot(data_mun$excomb_assassinated_17to20, type = "poisson")
```

```{r negative binomial models}
### Manuscript Table 1
## negative binomial estimates of homicides and ex-combatant assassinations
# homicides (Column 1)
negbin_model1 <- glm.nb(total_homicides_17to20 ~ log(excomb_numbers + 1) + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 +
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)

hc_vcov1  <- vcovHC(negbin_model1, type = "HC1")   
coeftest(negbin_model1, vcov. = hc_vcov1)
cl_vcov1 <- vcovCL(negbin_model1, cluster = ~ province, type = "HC1")
coeftest(negbin_model1, vcov. = cl_vcov1)

# homicides with the interaction term for protection (Column 2)
negbin_model2 <- glm.nb(total_homicides_17to20 ~ log(excomb_numbers + 1)*protection + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 +
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
hc_vcov2  <- vcovHC(negbin_model2, type = "HC1")   
coeftest(negbin_model2, vcov. = hc_vcov2)
cl_vcov2 <- vcovCL(negbin_model2, cluster = ~ province, type = "HC1")
coeftest(negbin_model2, vcov. = cl_vcov2)

# ex-combatant assassinations (Column 3)
negbin_model3 <- glm.nb(excomb_assassinated_17to20 ~ log(excomb_numbers + 1) + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 +
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)

hc_vcov3  <- vcovHC(negbin_model3, type = "HC1")   
coeftest(negbin_model3, vcov. = hc_vcov3)
cl_vcov3 <- vcovCL(negbin_model3, cluster = ~ province, type = "HC1")
coeftest(negbin_model3, vcov. = cl_vcov3)

# ex-combatants assassinations with the interaction term for protection (Column 4)
negbin_model4 <- glm.nb(excomb_assassinated_17to20 ~ log(excomb_numbers + 1)*protection + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 + 
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)

hc_vcov4  <- vcovHC(negbin_model4, type = "HC1")   
coeftest(negbin_model4, vcov. = hc_vcov4)
cl_vcov4 <- vcovCL(negbin_model4, cluster = ~ province, type = "HC1")
coeftest(negbin_model4, vcov. = cl_vcov4)

# non-ex-combatant homicides (Column 5)
negbin_model5 <- glm.nb(non_excomb_homicides_17to20 ~ log(excomb_numbers + 1) + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 + 
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)

hc_vcov5  <- vcovHC(negbin_model5, type = "HC1")   
coeftest(negbin_model5, vcov. = hc_vcov5)
cl_vcov5 <- vcovCL(negbin_model5, cluster = ~ province, type = "HC1")
coeftest(negbin_model5, vcov. = cl_vcov5)


# non-ex-combatant homicides with interaction term for protection (Column 6)
negbin_model6 <- glm.nb(non_excomb_homicides_17to20 ~ log(excomb_numbers + 1)*protection + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 + 
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)

hc_vcov6  <- vcovHC(negbin_model6, type = "HC1")   
coeftest(negbin_model6, vcov. = hc_vcov5)
cl_vcov6 <- vcovCL(negbin_model6, cluster = ~ province, type = "HC1")
coeftest(negbin_model6, vcov. = cl_vcov6)
# Variance Inflation Factor (VIF)
vif_values <- vif(negbin_model6)
print(vif_values)

texreg(list(negbin_model1, negbin_model2, negbin_model3, negbin_model4, negbin_model5, negbin_model6), digits = 3)
```

```{r negative binomial models of assassinations}
### Manuscript Table 2
## negative binomial estimates of ex-combatant assassinations
# collective projects (Column 1)
negbin_model60 <- glm.nb(excomb_assassinated_17to20 ~ log(collective_project + 1),
                                        data = data_mun)
summary(negbin_model60)

# collective projects and controls (Column 5)
negbin_model6 <- glm.nb(excomb_assassinated_17to20 ~ log(collective_project + 1) + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 +
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
summary(negbin_model6)

# collective projects with the interaction term (Column 2)
negbin_model70 <- glm.nb(excomb_assassinated_17to20 ~ log(collective_project+1)*protection,
                                        data = data_mun)
summary(negbin_model70)
# collective projects with the interaction term and controls (Column 6)
negbin_model7 <- glm.nb(excomb_assassinated_17to20 ~ log(collective_project+1)*protection + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 + 
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
summary(negbin_model7)

# civilian collaboration (Column 3)
negbin_model80 <- glm.nb(excomb_assassinated_17to20 ~ log(civilian_collaboration+1),
                                        data = data_mun)
summary(negbin_model80)

# civilian collaboration and controls (Column 7)
negbin_model8 <- glm.nb(excomb_assassinated_17to20 ~ log(civilian_collaboration+1) + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 +
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
summary(negbin_model8)

# civilian collaboration with the interaction term (Column 4)
negbin_model90 <- glm.nb(excomb_assassinated_17to20 ~ log(civilian_collaboration+1)*protection,
                                        data = data_mun)
summary(negbin_model90)

# civilian collaboration with the interaction term and controls (Column 8)
negbin_model9 <- glm.nb(excomb_assassinated_17to20 ~ log(civilian_collaboration+1)*protection + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 + 
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
summary(negbin_model9)

texreg(list(negbin_model60,negbin_model6,negbin_model70,negbin_model7,negbin_model80,negbin_model8,negbin_model90,negbin_model9), digits = 3)
```

```{r zero-inflated models}
### Appendix Table 4
## zero-inflated negative binomial models of ex-combatant assassinations
# number of ex-combatants (Column 1)
zinb_model1 <- zeroinfl(
  excomb_assassinated_17to20 ~ log(excomb_numbers + 1) +
    log(population_2016) + log(coca_2016 + 1) +
    inst_performance_2016 + log(homicides_2016 + 1) +
    insurgent_attacks_2016 + gov_attacks_2016 + log(idp_arrival_2016 + 1) | 1,
  dist = "negbin", data = data_mun)

# collective projects (Column 2)
zinb_model2 <- zeroinfl(
  excomb_assassinated_17to20 ~ log(collective_project + 1) +
    log(population_2016) + log(coca_2016 + 1) +
    inst_performance_2016 + log(homicides_2016 + 1) +
    insurgent_attacks_2016 + gov_attacks_2016 + log(idp_arrival_2016 + 1) | 1,
  dist = "negbin", data = data_mun)

# civilian collaboration (Column 3)
zinb_model3 <- zeroinfl(
  excomb_assassinated_17to20 ~ log(civilian_collaboration + 1) +
    log(population_2016) + log(coca_2016 + 1) +
    inst_performance_2016 + log(homicides_2016 + 1) +
    insurgent_attacks_2016 + gov_attacks_2016 + log(idp_arrival_2016 + 1) | 1,
  dist = "negbin", data = data_mun)

# number of ex-combatants × protection (Column 4)
zinb_model4 <- zeroinfl(
  excomb_assassinated_17to20 ~ log(excomb_numbers + 1) * protection +
    log(population_2016) + log(coca_2016 + 1) +
    inst_performance_2016 + log(homicides_2016 + 1) +
    insurgent_attacks_2016 + gov_attacks_2016 + log(idp_arrival_2016 + 1) | 1,
  dist = "negbin", data = data_mun)

# collective projects × protection (Column 5)
zinb_model5 <- zeroinfl(
  excomb_assassinated_17to20 ~ log(collective_project + 1) * protection +
    log(population_2016) + log(coca_2016 + 1) + 
    inst_performance_2016 + log(homicides_2016 + 1) +
    insurgent_attacks_2016 + gov_attacks_2016 + log(idp_arrival_2016 + 1) | 1,
  dist = "negbin", data = data_mun)

# civilian collaboration × protection (Column 6)
zinb_model6 <- zeroinfl(
  excomb_assassinated_17to20 ~ log(civilian_collaboration + 1) * protection +
    log(population_2016) + log(coca_2016 + 1) +
    inst_performance_2016 + log(homicides_2016 + 1) +
    insurgent_attacks_2016 + gov_attacks_2016 + log(idp_arrival_2016 + 1) | 1,
  dist = "negbin", data = data_mun)

texreg(list(zinb_model1, zinb_model2, zinb_model3, zinb_model4, zinb_model5, zinb_model6))
```

```{r Vuong test}
### Appendix Table 5
vuong(negbin_model4, zinb_model4)
```

```{r logit models}
### Appendix Table 6
# making a binary DV (any assassination in municipality-year)
data_mun <- data_mun %>%
  mutate(excomb_assassinated_binary = ifelse(excomb_assassinated_17to20 > 0, 1, 0))

# number of ex-combatants (Column 1)
logit_model1 <- glm(
  excomb_assassinated_binary ~ log(excomb_numbers + 1),
  family = binomial(link = "logit"), data = data_mun)

# collective projects (Column 2)
logit_model2 <- glm(
  excomb_assassinated_binary ~ log(collective_project + 1),
  family = binomial(link = "logit"), data = data_mun)

# civilian collaboration (Column 3)
logit_model3 <- glm(
  excomb_assassinated_binary ~ log(civilian_collaboration + 1),
  family = binomial(link = "logit"), data = data_mun)

# number of ex-combatants × protection (Column 4)
logit_model4 <- glm(
  excomb_assassinated_binary ~ log(excomb_numbers + 1) * protection,
  family = binomial(link = "logit"), data = data_mun)

# collective projects × protection (Column 5)
logit_model5 <- glm(
  excomb_assassinated_binary ~ log(collective_project + 1) * protection,
  family = binomial(link = "logit"), data = data_mun)

# civilian collaboration × protection (Column 6)
logit_model6 <- glm(
  excomb_assassinated_binary ~ log(civilian_collaboration + 1) * protection,
  family = binomial(link = "logit"), data = data_mun)

texreg(list(logit_model1, logit_model2, logit_model3, logit_model4, logit_model5, logit_model6))
```


```{r IV estimation}
### Appendix Table 8
## IV estimation for the causal effect of ex-combatants presence on assassinations
# first-stage OLS regression to test instrument relevance (Column 1)
first_stage <- lm(log(excomb_numbers + 1) ~ avg_insurgents_12to16 + 
                     log(population_2016) + 
                     log(coca_2016 + 1) +
                     inst_performance_2016 +
                     log(homicides_2016 + 1) +
                     gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
summary(first_stage)

data_mun$residuals_stage1 <- residuals(first_stage)

# F-test on instrument
linearHypothesis(first_stage, "avg_insurgents_12to16 = 0")

# second-stage (Column 2)
iv_nb_model <- glm.nb(excomb_assassinated_17to20 ~ log(excomb_numbers + 1) + 
                                        residuals_stage1 +
                                        log(population_2016) + 
                                        log(coca_2016 + 1) +
                                        inst_performance_2016 +
                                        log(homicides_2016 + 1) +
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
summary(iv_nb_model)

texreg(list(first_stage, iv_nb_model))
```


```{r effect on armed group attacks, warning=FALSE}
### Appendix Table 7
## negative binomial estimates of FARC dissident and criminal group (BACRIM) attacks
# dissidents (Column 1)
negbin_model20 <- glm.nb(avg_dissidents_17to20 ~ log(excomb_numbers + 1) + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 +
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
summary(negbin_model20)

hc_vcov20  <- vcovHC(negbin_model20, type = "HC1")   
coeftest(negbin_model20, vcov. = hc_vcov20)

# criminal groups (Column 2)
negbin_model22 <- glm.nb(avg_bacrim_17to20 ~ log(excomb_numbers + 1) + 
                                        log(population_2016) + 
                                        log(coca_2016+1) +
                                        inst_performance_2016 +
                                        log(homicides_2016+1) +
                                        insurgent_attacks_2016 + 
                                        gov_attacks_2016 + log(idp_arrival_2016+1),
                                        data = data_mun)
summary(negbin_model22)

hc_vcov22  <- vcovHC(negbin_model22, type = "HC1")   
coeftest(negbin_model22, vcov. = hc_vcov22)


texreg(list(negbin_model20,negbin_model22))
```
